package com.jackingod.app;

import java.io.BufferedReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Vector;
import java.util.concurrent.CountDownLatch;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

public class DataCollector001 extends Thread {

	public static final int THREAD_NUM = 100;
//	private static final String leafToken = "'http://www.xiukouwang.com/product";
	private static final String leafToken = 
			"http://www.xiukouwang.com/product";
	private static final String parentToken = 
			"http://www.xiukouwang.com/gallery--b%2C_ANY__t%2C_ANY_-10--";
	private static final String[] infoKeys = new String[] {
		"编 号：</span>", 
		"class=\"price1\">", 
		"http://www.biilii.com/pic/products/"};
	private static final String endToken = "<";
	private static final String otherToken = "grid.html";
	private static final String defaultDir = "D:/crawlerData";
	
	private static final List<URL> toDo = new Vector<URL>(800);
	private static final Set<URL> finished = new HashSet<URL>(1000);
	private static List<Object[]> infos = new ArrayList<Object[]>(13000);
	
	private static final Logger logger = LogManager.getLogger("DataCollector001");
	
	private static final CountDownLatch cdl = new CountDownLatch(THREAD_NUM);
	
	/***
	 * deal with the parent page. get the page information and
	 * the other parent page and leaf node.
	 * @param parent
	 */
	public static void processParent(URL parent) {
		synchronized (finished) {
			if (finished.contains(parent)) {
				logger.debug("url [" + parent + "] has processed.");
				return;
			} else {
				logger.info("process parent url [" + parent + "]");
			}
		}
		
		final List<URL> leafs = new ArrayList<URL>();
		final Set<String> parents = new HashSet<String>();
		BufferedReader br = null;
		
		
	}
	
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub

	}

}
